* THIS PROGRAM MERGES THE SOD and CALL REPORT DATA used to construct the BH measures for Cooper and Peek's RESTAT paper *

local out2020_dir <place your filepaths here>
cd "`out2020_dir'"


use bh-01b-call,clear 	//load call report data constructed in the "01b" step do file


*keep the relevant variables
local date_vars 	"dated dateq year q"			
local id_vars 		"entity rssd9348 rcon9804" 		


#delimit ;
local ratios 	
	renpl_ratio 
	npl_ratio   
	npa_ratio_2 
	renpa_ratio_2 
;
#delimit cr

keep  `date_vars' `id_vars' `ratios'
order `date_vars' `id_vars' `ratios'


ren entity  rssdid

*-----MERGE ANNUAL, BRANCH-LEVEL DEPOSITS TO QUARTERLY BANK-LEVEL CALL REPORT FINANCIALS

	/*	This step:
		1) Breaks call report data into 4 tempfiles, one per quarter (eg all years for Q1).
		2) Drops duplicate observations by year & bank-id. 
		3) Match-merges these 4 files into SOD to effectively convert the annual SOD to a quarterly frequency.
			-'MANY' SOD Branches to 'ONE' year-qtr-bank obs. (Although, because the datasets are already divided by quarters the merge is made by year and bank-id) 
			
		Note: 	This step subsets the data (via the match-merge) to where both regulatory forms are present. In other words, to be in our sample, a bank 
				must file both a call report (in a given quarter) as well as SOD in the relevant year.
		Note: 	SOD data are as of June (Q2-YYYY) of a given year.
		Note: 	Treating Q1-YYYY, Q3-YYYY, Q4-YYYY from call report as best match to Q2-YYYY Deposits data.*/
	

	forval x=1/4 {
		preserve 
			keep if q==`x'
			duplicates drop year rssdid,force
			
			*#############
			tempfile callqtr`x'
			save 	`callqtr`x''
			*#############

		restore
	}

	clear

	
	
* merge quarterly datasets into SOD data.

	forval x = 1/4 {

		use bh-01c-SOD-wGeocodes,clear  //load cleaned SOD data (many branch-observations for a given bank)

		
	 *match-merging the "many" SOD (branch) observations per bank to the bank's unique quarterly call report file. 	

	 
		*>>>>>>>>>>>>>>>>>>
		merge m:1 year rssdid using `callqtr`x'',keep(3) nogen 
		//Keeps only the matches--observations with SOD And call report data in a given quarter
		*>>>>>>>>>>>>>>>>>>
		
		
	*save merged (quarterly) datasets
	*will be appended below, after a small detour to create the bank-minimum rule from the Q2 file.
	
			*#################################
			save bh-02a-callsodqtr`x',replace 
			*#################################
	}

	
	

***************IMPLEMENT BANK MINIMUM RULE


		*-------------------------------------------------------------------------------
		/*
		This step studies when msa's have less than 5 banks.
		The file 'bh-02a-callsodqtr2' is the quarter-2 merge from above- i.e., it only contains 
		banks who have both a SOD & call report filing in Q2 (SOD is annually filed in June).
		The final product of this step will be to recode msa locations to rural if they don't have enough banks.
		The recode will have a value msabr==0 : nonMSA/rural.
			
		*see comments below as well as bhreadme.docx for more.*/
		*-------------------------------------------------------------------------------		
		
				
		use bh-02a-callsodqtr2,clear


		*don't need all vars - just looks at how many banks (not branches) are in an msa in a given year
		keep rssdid year msabr  

		*not counting rural locations - already rural
		drop if msabr==0

		*remove any duplicates
		duplicates drop year msabr rssdid,force

		gen dumsum=1

		*sum number of banks in each location (msa) and year
		bys year msabr:egen bnk_count=count(dumsum)

		*just need location level data after counting banks
		duplicates drop year msabr,force

		drop rssdid

		
		
		
		*======================================================================
		/*PART 1 OVERVIEW:
			generates a percentage of 'valid' years for a given msa.
			-(# of Years Location has >= 5 banks) / (Location's Total # of Years)
			-if 50% of location's time series has less than 5 banks:
				-then send to nonMSA/rural*/	
				
		*gen flag for Banks LE 5
		//The following step flags USABLE observations, i.e., in a given year, there are more than 4 banks in an MSA.
		gen 	MSA_GE5=0
		replace MSA_GE5=1 if bnk_count>=5   		
		la var  MSA_GE5 "Usable (ge 5 banks) observations"
		
		*set condition for tsspell to track patterns of consecutive observations that meet our >=5 rule
		tsset msabr year
		tsspell,pcond(bnk_count>=5) 		//ssc install tsspell
		
		
		rename _seq 	GE5_seq    		//cumulatively sums CONSECUTIVE PERIODS location has at least 5 banks 	- see var 'numperiods_GE5' below
		rename _spell 	GE5_spell		//counts GROUPINGS of consecutive periods summed above with GE5_seq 	- see var 'tot_runs_INSAMPLE' below

		

		*numerator
		bys msabr: egen tot_MSA_GE5=total(MSA_GE5)     	//count usable periods - (# of Years Location has >= 5 banks)


		*denominator
		bys msabr: egen tot_MSA_obs=total(dumsum)		//count all periods - (Location's Total # of Years)


		*gen share of usable obs (GE5)
		gen MSA_GE5_share=tot_MSA_GE5/tot_MSA_obs  		//(# of Years Location has >= 5 banks) / (Location's Total # of Years)
		
	
		
		
		/*---
		PART 2 OVERVIEW:
		Of the locations that have 50% of their observations with enough banks,
		we want to also make sure that these (year-msabr) observations stay in sample for at least 4 years.	
		If msa appears 'mostly' eligible the next step is to ensure that a location does not have 5 banks
		and then 4 banks every other year. Such "dancing" above and below the threshold over time,
		makes time series analysis of that location difficult.
		---*/
		
		*total number of runs/spells - in our sample
		egen tot_runs_INSAMPLE=max(GE5_spell),by(msabr)							

		*length of each spell
		egen numperiods_GE5 =max(GE5_seq),by(msabr GE5_spell) 
		
		

		*Flag locations with too few years with an acceptable number of banks - for recode to nMSA/rural
		gen 	SendingToRural_LT50=0
		replace SendingToRural_LT50=1 		if MSA_GE5_share<.5 



		*Flag locations that pop in and out of sample too much - for recode to nMSA/rural
		gen 	SendingToRural_LT4yrRun=0
		replace SendingToRural_LT4yrRun=1 	if numperiods_GE5<4

		replace MSA_GE5=0 					if SendingToRural_LT4yrRun==1
		

		*grab msa code of locations affected by either rule for possible reference / validation later
		gen archGeoB4_LT50_Recode=msabr 	if SendingToRural_LT50==1
		gen archGeoB4_LT4yrRun_Recode=msabr if SendingToRural_LT4yrRun==1
		
		
		#delimit ;
			local keeplist 
			year  msabr  bnk_count  MSA_GE5_share
			SendingToRural_LT50  	archGeoB4_LT50_Recode
			SendingToRural_LT4yrRun archGeoB4_LT4yrRun_Recode  
		;
		#delimit cr
		
		keep   `keeplist'
		

		duplicates drop year msabr,force  

				*#############################################
				tempfile bh_02a_bankMinRule
				save 	`bh_02a_bankMinRule',replace
				*#############################################
	
	
	
	

*APPEND THE 4 QUARTERLY FILES MADE ABOVE - i.e., bh-02a-callsodqtr1<-4>.dta

/*NOTE: the bank minimum rule DOES NOT REMOVE observations, it merely re-classifies
a given location's deposits as rural if necessary- therefore, we append the original quarterly files and then
merge in the relevant flags from the bank min rule.*/


clear
		
		
	*#################################
	tempfile bh_02a_AppendMrgdCallSOD
	save 	`bh_02a_AppendMrgdCallSOD', emptyok
	*#################################
	


	*|||||||||||||||||||
	append using bh-02a-callsodqtr1
	append using bh-02a-callsodqtr2 	
	append using bh-02a-callsodqtr3	
	append using bh-02a-callsodqtr4
	*|||||||||||||||||||
	
		
*MERGE BANK MIN FLAG INTO  MAIN CALL/SOD DATASET	
	*>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
	merge m:1 year msabr using `bh_02a_bankMinRule',assert(1 3) keep(1 3) nogen
		//_merge==1 is nMSA/rural locations from master that were excluded from bank minimum analysis
	*<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
	
	
*ADJUST LOCATIONS THAT MEET BANK MIN RULES - SEND TO RURAL	
replace msabr = 0 if SendingToRural_LT4yrRun==1 | SendingToRural_LT50==1		


* SAVE merged SOD/Call report data with minimum bank flags

		*#################################
		save bh-02a-masterCallSOD,replace 
		*#################################


*optional directory cleaning:
// local eraseCallQtrs "bh-02a-callsodqtr1 bh-02a-callsodqtr2 bh-02a-callsodqtr3 bh-02a-callsodqtr4"
// foreach f of local eraseCallQtrs {
// 	erase `f'.dta
// }

























